import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, convert_to_datetime, get_max, to_list, get_min_value, sort_by_values, get_first_n_rows, create_date_offset, filter_by_date, concatenate_objects, reset_index, rename_columns, bind_dataframe

# Load the dataset
atp_tennis = read_csv_file(os.path.join(sys.argv[1], 'atp_tennis.csv'))

# Convert the 'Date' column to datetime format
atp_tennis['Date'] = convert_to_datetime(atp_tennis['Date'])

# Find the last date in the dataset
last_date = get_max(atp_tennis['Date'])

# Filter the dataset to only include matches from the last five years
five_years_ago = last_date - create_date_offset(years=5)
recent_matches = filter_by_date(atp_tennis, 'Date', five_years_ago)

concatenated_data = concatenate_objects(
    recent_matches[['Player_1', 'Rank_1']],
    rename_columns(recent_matches[['Player_2', 'Rank_2']], {'Player_2': 'Player_1', 'Rank_2': 'Rank_1'})
)
grouped_data = bind_dataframe(concatenated_data, 'Player_1')
min_values = get_min_value(grouped_data)
sorted_data = sort_by_values(min_values, 'Rank_1')
top_10_ranked_players = get_first_n_rows(sorted_data, 10)

# Convert the top 10 ranked players and their rankings to a Python list
top_10_ranked_players_list = to_list(reset_index(top_10_ranked_players, drop=False))

print(top_10_ranked_players_list)
# pickle.dump(top_10_ranked_players_list,open("./ref_result/top_10_ranked_players_list.pkl","wb"))

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, check_elements_in_list, assert_series, logical_and, logical_or, count_unique_values, fill_missing_values, create_dataframe, reset_index


# Remove any extra spaces from the player names
top_10_player_names = [player[0].strip() for player in top_10_ranked_players_list]

# Filter the dataset to only include grass court matches played by the top 10 players in the last five years
player_1_in_top_10 = check_elements_in_list(recent_matches['Player_1'], top_10_player_names)
player_2_in_top_10 = check_elements_in_list(recent_matches['Player_2'], top_10_player_names)
matches_with_top_10_players = logical_or(player_1_in_top_10, player_2_in_top_10)

matches_on_grass = assert_series(recent_matches['Surface'], 'Grass', 'equality')

grass_matches = recent_matches[logical_and(matches_on_grass, matches_with_top_10_players)]

# Calculate the number of matches played by each player
matches_played = count_unique_values(grass_matches['Player_1']) + count_unique_values(grass_matches['Player_2'])
matches_played = fill_missing_values(matches_played[top_10_player_names],0)

# Calculate the number of matches won by each player
matches_won = count_unique_values(grass_matches['Winner'])
matches_won = fill_missing_values(matches_won[top_10_player_names],0)

# Calculate the win rate percentage for each player
win_rate_percentage = (matches_won / matches_played) * 100

# Create a data table with player names, number of matches played, number of matches won, and win rate percentage
win_rate_table = create_dataframe({'Player': top_10_player_names, 'Matches Played': matches_played, 'Matches Won': matches_won, 'Win Rate Percentage': win_rate_percentage})
win_rate_table = reset_index(win_rate_table)

print(win_rate_table)
# pickle.dump(win_rate_table,open("./ref_result/win_rate_table.pkl","wb"))

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, create_zeros_array, create_dataframe, check_elements_in_list, iterate_rows, access_dataframe_loc, update_dataframe_loc


# Assuming grass_matches and top_10_player_names are already defined
head_to_head_array = create_zeros_array((10, 10))
head_to_head_matrix = create_dataframe(head_to_head_array, columns=top_10_player_names, index=top_10_player_names)

# Filter grass matches where both players are in the top 10
head_to_head_matches = grass_matches[(check_elements_in_list(grass_matches['Player_1'], top_10_player_names)) & (check_elements_in_list(grass_matches['Player_2'], top_10_player_names))]

# Iterate over filtered matches, updating the head-to-head matrix with the winner and loser of each match
for index, row in iterate_rows(head_to_head_matches):
	winner = row['Winner']
	loser = row['Player_1'] if row['Player_1'] != winner else row['Player_2']
	current_value = access_dataframe_loc(head_to_head_matrix, winner, loser)
	update_dataframe_loc(head_to_head_matrix, winner, loser, current_value + 1)

print(head_to_head_matrix)
# pickle.dump(head_to_head_matrix,open("./ref_result/head_to_head_matrix.pkl","wb"))

import pandas as pd
import numpy as np
import pickle
from decision_company import read_csv_file, assert_series, logical_or, logical_and, create_dataframe


# Define the tournament rounds
rounds = ['Early Round', 'Quarterfinals', 'Semifinals', 'Final']

# Initialize an empty list to store the performance data
performance_data_list = []

# Calculate the performance data for each player in each round
for player in top_10_player_names:
    for round_name in rounds:
        if round_name == 'Early Round':
            # Filter the dataset to only include matches played by the player in the early rounds
            player_round_matches = grass_matches[logical_or(logical_or(logical_or(assert_series(grass_matches['Round'], '1st Round', 'equality'), assert_series(grass_matches['Round'], '2nd Round', 'equality')), assert_series(grass_matches['Round'], '3rd Round', 'equality')), assert_series(grass_matches['Round'], '4th Round', 'equality')) & logical_or(assert_series(grass_matches['Player_1'], player, 'equality'), assert_series(grass_matches['Player_2'], player, 'equality'))]
        else:
            # Filter the dataset to only include matches played by the player in the specific round
            player_round_matches = grass_matches[logical_and(assert_series(grass_matches['Round'], round_name, 'equality'), logical_or(assert_series(grass_matches['Player_1'], player, 'equality'), assert_series(grass_matches['Player_2'], player, 'equality')))]

        # Calculate the number of matches played
        matches_played = len(player_round_matches)

        # Calculate the number of matches won
        matches_won = len(player_round_matches[assert_series(player_round_matches['Winner'], player, 'equality')])

        # Calculate the win rate
        win_rate = (matches_won / matches_played) * 100 if matches_played > 0 else 0

        # Append the performance data to the list
        performance_data_list.append({'Player': player, 'Round': round_name, 'Matches Played': matches_played, 'Matches Won': matches_won, 'Win Rate': win_rate})

# Convert the list to a DataFrame
performance_data = create_dataframe(performance_data_list)

print(performance_data)
# pickle.dump(performance_data,open("./ref_result/performance_data.pkl","wb"))

